#Notes ## Notes about phenotyping csv file generation Uninfected phenotyping data was generated via flow cytometry gating to cell subset from alive cells. Infected phenotyping data was generated by gating to alive, then to only IAV+ cells (HA or NP positive), then to cell subset.

A “round” indicates an independent differentiation of NHBE cells.

Notes about flow cytometry csv files

Channel value csv files for each manually gated population were exported from FlowJo. A column was manually added that indicates what gated population the events belong to. Uninfected cells were gated from the alive population, infected cells were gated from the alive, then HA and/or NP positive populations. All csv files corresponding to the same well (N=3 wells each for infected and uninfected wells) were concatenated and saved as a single csv file.

Figure 1

library(ggplot2)
library(Spectre)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
library(forcats)
library(vegan)
## Loading required package: permute
## This is vegan 2.6-4
library(agricolae)
## Registered S3 methods overwritten by 'klaR':
##   method      from 
##   predict.rda vegan
##   print.rda   vegan
##   plot.rda    vegan
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## 
## Attaching package: 'factoextra'
## The following object is masked from 'package:agricolae':
## 
##     hcut
library(readxl)
library(readr)
library(ggalluvial)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
library(broom)



colors <- c("#1080FF", "#8000FF", "#FB02FF", "#FD8008", "#FECC66", "#FC6FCF", "#FFFF0A", "#FFFFFF", "#108080")

setwd("/Users/sheph085/Documents/postdoc_projects/FN_nHTBE/Manuscripts/Code/scripts")

Figure 1A

Shows bar plot comparing uninfected cell type frequencies between bronchial (NHBE) and nasal (NHNE) primary cells. Preinfection phenotyping data from round 7 was used to generate the bar plots comparing NHBE and NHNE in Figure 1A.

#CSV file contains all the infected and uninfected phenotyping data included in the paper:
#Uninfected phenotyping data includes 7 independent differentiations (rounds 3-9)
#Cal/07/09 infection (MOI=0.5) and phenotyping data was generated for rounds 3,4,7, and 9
#Round 3 includes an additional Cal/07/09 infection done at MOI=1.

#Read in data and calculate average and standard deviation of cell subset frequencies for each round/week/moi/condition combination. 
phenotyping_dat <- read.csv("../data/pre_and_post_infection_phenotyping_data.csv", stringsAsFactors = TRUE, header = TRUE) %>%
  group_by(round, week, moi, infection, cell_origin, condition) %>%
  mutate(round = factor(round),
         week = factor(week, levels = c("0", "1", "2", "3")), 
         cell_type = factor(cell_type, levels = c("Ciliated", "Secretory", "Basal", "Triple Negative", "CD66c+ Other", "CD66c- Other", "Goblet", "CD271+ Ciliated", "CD66c+ Basal")),
         sample_id = cur_group_id()) %>%
  group_by(sample_id, round, week, moi, infection, cell_origin, condition, cell_type) %>%
  dplyr::summarise(avg_freq_of_alive = mean(frequency_of_alive),
            sd = sd(frequency_of_alive)) %>%
  mutate(SDpos = rev(cumsum(rev(avg_freq_of_alive))))
## `summarise()` has grouped output by 'sample_id', 'round', 'week', 'moi',
## 'infection', 'cell_origin', 'condition'. You can override using the `.groups`
## argument.
#Each combination of a round, week, "moi" and treatment consists of 2-4 culture wells (technical replicates).
#To get error bars to be placed correctly on bar graph, some additional data wrangling is required as well.
uninfected_dat <- phenotyping_dat %>%
  filter(infection == "uninfected" & moi == 0.5)
#Just NHBE and NHNE
uninfected_dat %>%
  mutate(week = factor(week),
         condition_origin = paste0(condition, "-", cell_origin),
         condition_origin = factor(condition_origin, levels = c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne", "il13_low-nhne", "il13_high-nhne", "dapt-nhne"))) %>%  
  filter(round %in% c("7"),
         week %in% c("3"),
         condition == "pneumacult") %>%
  droplevels() %>%
  ggplot(aes(x = condition_origin, y = avg_freq_of_alive, fill = cell_type)) +
    geom_bar(stat = "identity", color = "black") +
    geom_errorbar(aes(ymin = SDpos-sd, ymax = SDpos+sd), width = 0.2, position = "identity") + 
    scale_fill_manual(values = colors) +
    scale_y_continuous(limits = c(0,100), expand = c(0,0)) +
    ggtitle("NHBE vs NHNE pre-infection treatment comparisons\nRound 7") +
    labs(y = "Average frequency of alive",
         x = NULL,
         fill = "Cell type",
         caption = "Error bars show standard deviation of 2-4 wells per treatment/round") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          panel.grid = element_blank())

Figure 1B

Shows UMAP of flow cytometry data. Flow cytometry channel value data from round 7 Pneumacult treated cells was used to generate UMAPs displaying the manually gated populations and marker staining. The UMAP is a combination of uninfected cells (n=3 wells) and infected cells (n=3 wells). Figure 1B in the manuscript only shows the uninfected cells, but both are used to generate the UMAP in order to integrate the cluster positions for comparison with figure 2A.

#Read flowjo metadata
meta <- read.csv("../data/figure_1_metadata.csv", header = TRUE)

#Import csv files
data.list <- Spectre::read.files(file.loc = "../data/round_7_channel_value_files/", file.type = ".csv")
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
#Merge data
cell.dat <- Spectre::do.merge.files(dat = data.list)
cell.dat
##         FSC-A FSC-H FSC-W SSC-A SSC-H SSC-W SiR-Tub Cal NP Cal HA CD66c TSPAN8
##      1:    72    65   384    78    65   353     184    153    205   243    151
##      2:    92    82   394    64    56   324     174    147    213   198    164
##      3:   100    82   416    64    53   343     195    154    199   166    145
##      4:    72    45   470   112    80   409     239    140    235   275    204
##      5:    70    52   420    93    76   367     194    150    236   263    172
##     ---                                                                       
## 185490:    74    51   438    64    42   407     218    143    220   187    147
## 185491:   176   143   446    40    34   314     138    150    192   223    148
## 185492:    68    54   422   103    77   400     216    145    228   233    189
## 185493:   159   133   433    48    36   368     152    146    209   262    156
## 185494:    72    50   431    65    51   349     145    146    205   256    155
##         CD271 Comp-UV Blue L_D-A Time       cell_type                 FileName
##      1:   549                226    0           Basal     Round_7_Pneumacult_1
##      2:   549                214    0           Basal     Round_7_Pneumacult_1
##      3:   579                182    0           Basal     Round_7_Pneumacult_1
##      4:   543                257    0           Basal     Round_7_Pneumacult_1
##      5:   542                209    0           Basal     Round_7_Pneumacult_1
##     ---                                                                       
## 185490:   161                306  995 Triple negative Round_7_Pneumacult_Cal_6
## 185491:   147                249  995 Triple negative Round_7_Pneumacult_Cal_6
## 185492:   158                358  996 Triple negative Round_7_Pneumacult_Cal_6
## 185493:   138                226  996 Triple negative Round_7_Pneumacult_Cal_6
## 185494:   138                238  996 Triple negative Round_7_Pneumacult_Cal_6
##         FileNo
##      1:      1
##      2:      1
##      3:      1
##      4:      1
##      5:      1
##     ---       
## 185490:      6
## 185491:      6
## 185492:      6
## 185493:      6
## 185494:      6
#Add metadata
sample.info <- meta[,c(1:4, 6)]

cell.dat <- do.add.cols(cell.dat, "FileName", sample.info, "FileName", rmv.ext = TRUE)
## Removing '.csv' or '.fcs' extension
## Step 1/3. Mapping data
## Step 2/3. Merging data
## Step 3/3. Returning data
#Define what markers to use to cluster cells (all markers except for HA and NA: SiR-Tub, CD66c, TSPAN8, CD271)
cluster.cols <- names(cell.dat)[c(7,10:12)]

#Define variables that contain metadata
exp.name <- "Round 7"
sample.col <- "sample"
treatment.col <- "treatment"
condition.col <- "condition"


data.frame(table(cell.dat[[condition.col]])) # Check number of cells per infection condition.
##         Var1  Freq
## 1   infected 87512
## 2 uninfected 97982
#Define target for subsampling. We have 87,000 cells in all uninfected and ~98,000 cells in all infected conditions. Here we will use 20,000 cells per target for graphing (but all cells are used to generate the clusters).
sub.targets <- c(20000, 20000)


#Run flowSOM clustering algorithm on full dataset, generate 9 clusters (same number that we have in our dataset by manual gating)
clustered.dat <- run.flowsom(cell.dat, cluster.cols, meta.k = 9)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: FlowSOM
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:BiocGenerics':
## 
##     normalize, path, union
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:agricolae':
## 
##     similarity
## The following object is masked from 'package:vegan':
## 
##     diversity
## The following object is masked from 'package:permute':
## 
##     permute
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## Thanks for using FlowSOM. From version 2.1.4 on, the scale 
## parameter in the FlowSOM function defaults to FALSE
## Preparing data
## Starting FlowSOM
## Building SOM
## Mapping data to SOM
## Building MST
## Binding metacluster labels to starting dataset
## Binding cluster labels to starting dataset
clustered.dat
##         FSC-A FSC-H FSC-W SSC-A SSC-H SSC-W SiR-Tub Cal NP Cal HA CD66c TSPAN8
##      1:    72    65   384    78    65   353     184    153    205   243    151
##      2:    92    82   394    64    56   324     174    147    213   198    164
##      3:   100    82   416    64    53   343     195    154    199   166    145
##      4:    72    45   470   112    80   409     239    140    235   275    204
##      5:    70    52   420    93    76   367     194    150    236   263    172
##     ---                                                                       
## 185490:    74    51   438    64    42   407     218    143    220   187    147
## 185491:   176   143   446    40    34   314     138    150    192   223    148
## 185492:    68    54   422   103    77   400     216    145    228   233    189
## 185493:   159   133   433    48    36   368     152    146    209   262    156
## 185494:    72    50   431    65    51   349     145    146    205   256    155
##         CD271 Comp-UV Blue L_D-A Time       cell_type                 FileName
##      1:   549                226    0           Basal     Round_7_Pneumacult_1
##      2:   549                214    0           Basal     Round_7_Pneumacult_1
##      3:   579                182    0           Basal     Round_7_Pneumacult_1
##      4:   543                257    0           Basal     Round_7_Pneumacult_1
##      5:   542                209    0           Basal     Round_7_Pneumacult_1
##     ---                                                                       
## 185490:   161                306  995 Triple negative Round_7_Pneumacult_Cal_6
## 185491:   147                249  995 Triple negative Round_7_Pneumacult_Cal_6
## 185492:   158                358  996 Triple negative Round_7_Pneumacult_Cal_6
## 185493:   138                226  996 Triple negative Round_7_Pneumacult_Cal_6
## 185494:   138                238  996 Triple negative Round_7_Pneumacult_Cal_6
##         FileNo sample round treatment  condition FlowSOM_cluster
##      1:      1   1_PC     7        PC uninfected             177
##      2:      1   1_PC     7        PC uninfected             177
##      3:      1   1_PC     7        PC uninfected             192
##      4:      1   1_PC     7        PC uninfected             149
##      5:      1   1_PC     7        PC uninfected             164
##     ---                                                         
## 185490:      6   6_PC     7        PC   infected             155
## 185491:      6   6_PC     7        PC   infected             141
## 185492:      6   6_PC     7        PC   infected             155
## 185493:      6   6_PC     7        PC   infected             141
## 185494:      6   6_PC     7        PC   infected             141
##         FlowSOM_metacluster
##      1:                   8
##      2:                   8
##      3:                   8
##      4:                   8
##      5:                   8
##     ---                    
## 185490:                   5
## 185491:                   5
## 185492:                   5
## 185493:                   5
## 185494:                   5
#Create subsampled dataset for UMAP visualization
cell.sub <- do.subsample(clustered.dat, sub.targets, treatment.col)
cell.sub
##        FSC-A FSC-H FSC-W SSC-A SSC-H SSC-W SiR-Tub Cal NP Cal HA CD66c TSPAN8
##     1:   103    57   477   158   114   447     384    164    279   492    201
##     2:   575   306   710   718   394   693     784    134    347   625    219
##     3:   137   105   437    88    74   364     197    157    459   262    140
##     4:    70    63   383    77    67   345     184    149    197   203    167
##     5:    77    58   411    59    52   323     181    143    197   135    151
##    ---                                                                       
## 19996:   170   106   496   143    98   434     278    158    268   289    162
## 19997:   125   100   422    80    62   444     190    153    196   203    158
## 19998:    68    54   450   200   135   477     225    419    605   311    164
## 19999:   109    85   417    90    72   379     240    163    391   275    152
## 20000:   587   279   782   421   170   724     210    144    335   402    223
##        CD271 Comp-UV Blue L_D-A Time    cell_type                 FileName
##     1:   168                252  683    Secretory     Round_7_Pneumacult_2
##     2:   287                350    9     Ciliated     Round_7_Pneumacult_2
##     3:   648                211  160        Basal Round_7_Pneumacult_Cal_4
##     4:   340                200   83 CD66c- other     Round_7_Pneumacult_3
##     5:   481                199  898 CD66c- other     Round_7_Pneumacult_2
##    ---                                                                    
## 19996:   149                213  430    Secretory     Round_7_Pneumacult_2
## 19997:   662                213  646        Basal Round_7_Pneumacult_Cal_5
## 19998:   209                197  712    Secretory Round_7_Pneumacult_Cal_6
## 19999:   654                236  703        Basal Round_7_Pneumacult_Cal_4
## 20000:   146                341  949    Secretory     Round_7_Pneumacult_1
##        FileNo sample round treatment  condition FlowSOM_cluster
##     1:      2   2_PC     7        PC uninfected              47
##     2:      2   2_PC     7        PC uninfected              17
##     3:      4   4_PC     7        PC   infected             152
##     4:      3   3_PC     7        PC uninfected             172
##     5:      2   2_PC     7        PC uninfected             189
##    ---                                                         
## 19996:      2   2_PC     7        PC uninfected             101
## 19997:      5   5_PC     7        PC   infected             195
## 19998:      6   6_PC     7        PC   infected             115
## 19999:      4   4_PC     7        PC   infected             153
## 20000:      1   1_PC     7        PC uninfected              62
##        FlowSOM_metacluster
##     1:                   5
##     2:                   2
##     3:                   8
##     4:                   5
##     5:                   5
##    ---                    
## 19996:                   5
## 19997:                   8
## 19998:                   5
## 19999:                   8
## 20000:                   5
#Create UMAP
cell.sub <- run.umap(cell.sub, cluster.cols, neighbours = 50)
## Loading required package: umap
## [2023-08-21 15:55:13]  starting umap
## [2023-08-21 15:55:13]  creating graph of nearest neighbors
## [2023-08-21 15:57:54]  creating initial embedding
## [2023-08-21 15:58:01]  optimizing embedding
## [2023-08-21 15:59:23]  done
#A little data wrangling for graphing
cell.sub$cell_type <- factor(cell.sub$cell_type, levels = c("Ciliated", "Secretory", "Basal", "Triple negative", "CD66c+ other", "CD66c- other", "Goblet", "CD271+ ciliated", "CD66c+ basal"))
cell.sub$orderrank_np <- rank(cell.sub$`Cal NP`,ties.method="first")
cell.sub$orderrank_ha <- rank(cell.sub$`Cal HA`,ties.method="first")
cell.sub$condition <- factor(cell.sub$condition, levels = c("uninfected", "infected"))
cell.sub$FlowSOM_metacluster <- factor(cell.sub$FlowSOM_metacluster)
#Make graph of UMAP colored by populations assigned via manual gating in FlowJo
ggplot(data = cell.sub,
       aes(x = UMAP_X, y = UMAP_Y, color = cell_type)) +
  geom_point(size = 0.5) +
  facet_grid(. ~ condition) +
  scale_color_manual(values = c("#1080FF", "#8000FF", "#FB02FF", "#FD8008", "#FECC66", "#FC6FCF", "#FFFF0A", "black", "#108080")) +
  ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by cell type") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  coord_fixed()

ggplot(data = cell.sub,
       aes(x = UMAP_X, y = UMAP_Y, color = `SiR-Tub`)) +
  geom_point(alpha = 0.4) +
  scale_color_distiller(palette = "Spectral")+
  facet_grid(. ~ condition) +
  ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by SiR-Tub staining") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  coord_fixed()

ggplot(data = cell.sub,
       aes(x = UMAP_X, y = UMAP_Y, color = `CD66c`)) +
  geom_point(alpha = 0.4) +
  scale_color_distiller(palette = "Spectral") +
  facet_grid(. ~ condition) +
  ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by CD66c staining") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  coord_fixed()

ggplot(data = cell.sub,
       aes(x = UMAP_X, y = UMAP_Y, color = `TSPAN8`)) +
  geom_point(alpha = 0.4) +
  scale_color_distiller(palette = "Spectral") +
  facet_grid(.~condition) +
  ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by TSPAN8 staining") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  coord_fixed()

ggplot(data = cell.sub,
       aes(x = UMAP_X, y = UMAP_Y, color = `CD271`)) +
  geom_point(alpha = 0.4) +
  scale_color_distiller(palette = "Spectral") +
  facet_grid(.~condition) +
  ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by CD271 staining") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  coord_fixed()

Figure 1C

Shows heatmap comparing changes in cell type frequencies each week during differentiation. The timecourse experiments were performed from two independent differentiations (round 6 and 7) of NHBE cells, and one independent differentiation of NHNE cells. The exact timepoints/cells included were: * NHBE differentiation, phenotyped at weeks 0, 1, 2, and 3 * NHBE differentiation, phenotyped at weeks 0, 1, and 3 * NHNE differentiation, phenotyped at weeks 0, 1, 2, and 3

Data in paper is from the “round 6” NHBE differentiation, which is one of two experiments where a timecourse was performed. But all sets of timecourse data are included/graphed below.

uninfected_dat %>%
  filter(round %in% c("6", "7"), #Filter down to just the timecourse data from rounds 6 and 7
         condition == "pneumacult",
         !(round == "7" & cell_origin == "nhne")) %>%
  droplevels() %>%
  group_by(round, cell_origin, cell_type) %>%
  mutate(fold_change_vs_wk0 = avg_freq_of_alive/avg_freq_of_alive[match('0', week)], #Calculate change in frequency compared to week 0 for each cell type
         log_fold_change_vs_wk0 = log10(fold_change_vs_wk0)) %>% #Log transform
  filter(week != 0) %>% #Filter week 0 since it is zero log fold change
  ggplot(aes(x = week, 
             y = fct_rev(cell_type),
             fill = log_fold_change_vs_wk0)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(name = "Log10 fold change in frequency\nvs Week 0",
                      high="red", mid = "white", low = "#333333") + 
  facet_wrap(round~cell_origin, ncol = 2) +
  theme_bw() +
  ggtitle("Cell type frequency changes over time\n(log10 fold change vs Week 0)")+
  labs(x = "Week",
       y = "Cell type") +
  theme(plot.title = element_text(size = 12),
        panel.grid.major = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text = element_text(size = 10))

Figure 1D

Shows a box and whiskers plot of Shannon’s alpha diversity indices calculated over time in the dataset.

#Alpha values are calculated here for infected AND uninfected data. Figure 1 only includes alpha data from uninfected replicates. Infected alpha values are depicted in Figure 2. 
#Manipulate uninfected phenotyping df so that sample IDs are column names and row names are the cell type
count_dat <- phenotyping_dat %>%
  ungroup() %>%
  select(sample_id,cell_type,avg_freq_of_alive) %>%
  tidyr::spread(sample_id, avg_freq_of_alive)
  
#Calculate simpson alpha index for each sample from the above manipulated dataframe
alpha <- vegan::diversity(count_dat[,-1], MARGIN = 2, index = "simpson")
alpha <- as.data.frame(alpha)

#Create new df that contains alpha information for each replicate
colnames(alpha) <- "alpha"
alpha <- merge(phenotyping_dat %>%
                          select(sample_id, round, week, moi, infection, cell_origin, condition) %>%
                          distinct(., sample_id, round, week, moi, infection, cell_origin, condition) %>%
                          tibble::column_to_rownames('sample_id'),
                        alpha,
                        by = "row.names")

#Function to position annotations above bar plots
give.n <- function(x){
  return(c(y = median(x)*1.2, label = length(x))) 
  # experiment with the multiplier to find the perfect position
}

#Create plot of just the uninfected data
alpha %>%
  filter(infection == "uninfected" & moi == 0.5) %>% #Filter infected data from df
  ggplot(aes(x = week, y = alpha)) +
    geom_boxplot()+
    ggtitle("Simpson's alpha diversity index by week\nRounds 3-9, Week 3") +
    stat_summary(fun.data = give.n, geom = "text", position = position_dodge(width = 0.75), fun.y = median) +
    scale_y_continuous(limits = c(0.3,1), expand = c(0,0)) +
    theme(panel.grid = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black")) +
    labs(x = "Weeks post-ALI",
         y = "Simpson's alpha diversity index",
         caption = "Numbers above boxes = # replicates")
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Figure 1E

Shows heatmap comparing changes in cell type frequencies among the different treatment conditions within an independent differentiation (“round”). The heatmaps show relative changes in cell type frequencies versus a baseline of the Pneumacult treatment group. All rounds are graphed here, but only round 7 data is shown in Figure 1E.

First a comparison of all rounds. The NHBE and NHNE cells are graphed separately.

#All rounds
uninfected_dat %>%
  filter(week == "3") %>%
  group_by(round, cell_origin, cell_type) %>%
  mutate(fold_change_vs_pc = avg_freq_of_alive/avg_freq_of_alive[match('pneumacult', condition)]) %>%
  mutate(condition = factor(condition, levels = c("pneumacult", "il13_low", "il13_high", "dapt", "lonza", "ifn")),
         round_cell_origin = paste(round, cell_origin, sep = "_"),
         log_fold_change_vs_pc = log10(fold_change_vs_pc)) %>%
  ggplot(aes(x = condition, 
             y = fct_rev(cell_type),
             fill = log_fold_change_vs_pc)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(name = "Log10 fold change in frequency\nvs Pneumacult",
                      high="red", mid = "white", low = "#333333") + 
  facet_grid(round~cell_origin) +
  theme_bw() +
  ggtitle("Cell type frequency changes after treatments\nWeek 3 (log10 fold change vs Pneumacult)")+
  labs(y = "Cell Type",
       x = "Condition") +
  theme(plot.title = element_text(size = 12),
        panel.grid.major = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text = element_text(size = 10))

#Round 7 only, remove the NHBE Pneumacult column because it is all zero (it is log10 fold change vs itself)
#Also include the Pneumacult NHNE group, instead of graphing NHBE and NHNE separately
uninfected_dat %>%
  filter(week == "3" & round == "7") %>%
  mutate(condition_origin = paste0(condition, "-", cell_origin),
         condition_origin = factor(condition_origin, levels = c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne", "il13_low-nhne", "il13_high-nhne", "dapt-nhne"))) %>%
  group_by(cell_type) %>%
  mutate(fold_change_vs_pc = avg_freq_of_alive/avg_freq_of_alive[match('pneumacult-nhbe', condition_origin)]) %>%
  mutate(log_fold_change_vs_pc = log10(fold_change_vs_pc)) %>%
  filter(condition_origin != "pneumacult-nhbe") %>%
  ggplot(aes(x = condition_origin, 
             y = fct_rev(cell_type),
             fill = log_fold_change_vs_pc)) +
  geom_tile(color = "black") +
  scale_fill_gradient2(limits = c(-3,2), name = "Log10 fold change in frequency\nvs Pneumacult",
                      high="red", mid = "white", low = "#333333") + 
  theme_bw() +
  ggtitle("Cell type frequency changes after treatments\nWeek 3 (log10 fold change vs Pneumacult)")+
  labs(y = "Cell Type",
       x = "Condition") +
  theme(plot.title = element_text(size = 12),
        panel.grid.major = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text = element_text(size = 10))

Figure 1F

Shows a box and whiskers plot of Shannon’s alpha diversity indices calculated for all the week 3 data, combining rounds and conditions.

Since only a single replicate of NHNE differentiations at conditions other than Pneumacult was performed, only NHBE Pneumacult treated conditions are included.

#first, do ANOVA to compare treatment group alpha diversities preinfection
#Do ANOVA to look for differences
anova_result <- aov(alpha ~ condition_origin, 
                    alpha %>%
                      filter(infection == "uninfected" & week == "3") %>%
                      mutate(condition_origin = paste0(condition, "-", cell_origin),
                             condition_origin = factor(condition_origin, levels = c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne", "il13_low-nhne", "il13_high-nhne", "dapt-nhne"))) %>%
                      filter(condition_origin %in% c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne")))
summary(anova_result)
##                  Df Sum Sq Mean Sq F value   Pr(>F)    
## condition_origin  5  0.245 0.04900   7.681 0.000306 ***
## Residuals        21  0.134 0.00638                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Tukey posthoc test
tukey_result <- HSD.test(anova_result, "condition_origin", group = TRUE)
print(tukey_result)
## $statistics
##       MSerror Df      Mean       CV
##   0.006378581 21 0.7055277 11.32004
## 
## $parameters
##    test           name.t ntr StudentizedRange alpha
##   Tukey condition_origin   6         4.424353  0.05
## 
## $means
##                     alpha        std r       Min       Max       Q25       Q50
## dapt-nhbe       0.4401954 0.08435288 3 0.3810741 0.5367927 0.3918968 0.4027194
## il13_high-nhbe  0.7456857 0.10254686 6 0.5503647 0.8478375 0.7451601 0.7692444
## il13_low-nhbe   0.7540470 0.06342009 6 0.6461721 0.8017064 0.7256237 0.7857632
## lonza-nhbe      0.7004960 0.02830200 3 0.6683451 0.7216454 0.6899212 0.7114973
## pneumacult-nhbe 0.7428324 0.08633512 7 0.5578255 0.8189732 0.7457429 0.7668465
## pneumacult-nhne 0.7144745 0.02655215 2 0.6956993 0.7332497 0.7050869 0.7144745
##                       Q75
## dapt-nhbe       0.4697561
## il13_high-nhbe  0.7925297
## il13_low-nhbe   0.7959157
## lonza-nhbe      0.7165714
## pneumacult-nhbe 0.7823480
## pneumacult-nhne 0.7238621
## 
## $comparison
## NULL
## 
## $groups
##                     alpha groups
## il13_low-nhbe   0.7540470      a
## il13_high-nhbe  0.7456857      a
## pneumacult-nhbe 0.7428324      a
## pneumacult-nhne 0.7144745      a
## lonza-nhbe      0.7004960      a
## dapt-nhbe       0.4401954      b
## 
## attr(,"class")
## [1] "group"
tukey_groups <- tukey_result$groups[order(rownames(tukey_result$groups)),]

#Great graph with the tukey groups annotated
alpha %>%
  filter(infection == "uninfected" & week == "3") %>%
  mutate(condition_origin = paste0(condition, "-", cell_origin),
         condition_origin = factor(condition_origin, levels = c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe", "lonza-nhbe", "pneumacult-nhne", "il13_low-nhne", "il13_high-nhne", "dapt-nhne"))) %>%
  filter(condition_origin %in% c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne")) %>%
  ggplot(aes(x=condition_origin,
             y = alpha,
             color = condition_origin)) +
    geom_boxplot() +
    geom_text(data = data.frame(),
              aes(x = rownames(tukey_groups), y = max(alpha$alpha) + .1, label = tukey_groups$groups),
              col = 'black',
              size = 10) +
    scale_color_manual(values = c("#70C1B3", "#004F2D", "#FFE066", "#247BA0", "#999999", "#70C1B3")) + 
    stat_summary(fun.data = give.n, geom = "text", position = position_dodge(width = 0.75), fun.y = median) +
    ggtitle("Simpson's alpha diversity index-by condition\nRounds 3-9, Week 3") +
    labs(x = "Condition",
         y = "Simpson's alpha diversity index",
         caption = "Letters indicate groups determined by Tukey posthoc test\nNumbers indicate # of replicates") +
    theme(panel.grid = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")

Did an ANOVA to look for differences in alpha diversities among the treatment groups. Found that none of the alpha diversities across treatment groups were statistically significant from each other, except for NHBE cells treated with Pneumacult + DAPT.

Figure 1G

Shows PCA of uninfected phenotyping data. PCA was performed on the uninfected cell type frequencies and colored by treatment and cell origin (NHBE or NHNE).

#Get the uninfected week 3 data in own dataframe
uninfected_pca_wk3_dat <- uninfected_dat %>%
  filter(week == 3) %>%
  ungroup() %>%
  select(sample_id, condition, cell_type, cell_origin, avg_freq_of_alive) %>%
  tidyr::spread(cell_type, avg_freq_of_alive) %>%
  droplevels() %>%
  mutate(condition = factor(condition, levels = c("pneumacult", "il13_low", "il13_high", "dapt", "lonza"))) %>%
  tibble::column_to_rownames('sample_id')

#Run principle components analysis on a matrix of the above data
preinfection_pca_wk3 <- prcomp(as.matrix(uninfected_pca_wk3_dat[3:11]), scale = TRUE)

#Plot the PCA biplot
preinfection_pca_plot_wk3 <- fviz_pca_biplot(preinfection_pca_wk3,
                                             mean.point = FALSE,
                                             label = "var",
                                             title = "NH(B/N)E Preinfection phenotyping PCA-\nWeek 3 only")+
  geom_point(size = 3,aes(shape = uninfected_pca_wk3_dat$cell_origin, color = uninfected_pca_wk3_dat$condition)) +
  scale_color_manual(values = c("#70C1B3", "#004F2D", "#FFE066", "#247BA0", "#999999"))+
  guides(shape = guide_legend(title = "Cell origin"),
         colour = guide_legend(title = "Condition"))

preinfection_pca_plot_wk3

#Create plots of active variable contribution to the first two principal components
fviz_contrib(preinfection_pca_wk3, choice="var", axes = 1)

fviz_contrib(preinfection_pca_wk3, choice="var", axes = 2)

Figure 1H

MDS plot showing similarity in uninfected cell type frequencies between donors. 5 different donors were used. This MDS plot compares NHBE Pneumacult treatments only, since this treatment was performed for each round.

Donor information: * Rounds 3 and 4: Donor A * Round 5: Donor B * Rounds 6 and 7: Donor C * Round 8: Donor D * Round 9: Donor E

#Dataframe of donor info
donor_info <- data.frame(round = c("3", "4", "5", "6", "7", "8", "9"),
                         donor = c("A", "A", "B", "C", "C", "D", "E"))

#Calculate Bray-Curtis index for dissimilarity
nhbe_pneu_beta_dist <- vegdist(t(uninfected_dat %>%
                                   filter(cell_origin == "nhbe",
                                          moi == 0.5,
                                          condition == "pneumacult",
                                          week == "3") %>%
                                   ungroup() %>%
                                   select(sample_id, cell_type, avg_freq_of_alive) %>%
                                   tidyr::spread(sample_id, avg_freq_of_alive) %>%
                                   .[,-1]), index = "bray")

#Feed into an MDS function:
nhbe_pneu_mds <- metaMDS(nhbe_pneu_beta_dist)
## Run 0 stress 0.003160624 
## Run 1 stress 0.003210843 
## ... Procrustes: rmse 0.003670819  max resid 0.004473532 
## ... Similar to previous best
## Run 2 stress 0.003243644 
## ... Procrustes: rmse 0.006145051  max resid 0.007536311 
## Run 3 stress 0.003250694 
## ... Procrustes: rmse 0.006688302  max resid 0.008215421 
## Run 4 stress 0.08722371 
## Run 5 stress 0.1406759 
## Run 6 stress 0.05931879 
## Run 7 stress 0.0593188 
## Run 8 stress 0.08722371 
## Run 9 stress 0.003235935 
## ... Procrustes: rmse 0.0055554  max resid 0.00680137 
## Run 10 stress 0.1406759 
## Run 11 stress 0.003160697 
## ... Procrustes: rmse 3.872058e-05  max resid 7.12503e-05 
## ... Similar to previous best
## Run 12 stress 0.05931883 
## Run 13 stress 0.003160621 
## ... New best solution
## ... Procrustes: rmse 4.306163e-06  max resid 8.108033e-06 
## ... Similar to previous best
## Run 14 stress 0.003256923 
## ... Procrustes: rmse 0.007055628  max resid 0.008668112 
## Run 15 stress 0.1406759 
## Run 16 stress 0.003244011 
## ... Procrustes: rmse 0.006152001  max resid 0.007541593 
## Run 17 stress 0.003247572 
## ... Procrustes: rmse 0.006408561  max resid 0.007867237 
## Run 18 stress 0.003160689 
## ... Procrustes: rmse 1.272649e-05  max resid 1.76575e-05 
## ... Similar to previous best
## Run 19 stress 0.1406759 
## Run 20 stress 0.003160659 
## ... Procrustes: rmse 2.698592e-05  max resid 5.087215e-05 
## ... Similar to previous best
## *** Best solution repeated 3 times
nhbe_pneu_mds_data <- as.data.frame(nhbe_pneu_mds$points)

nhbe_pneu_graph_data <- merge(nhbe_pneu_mds_data, 
                              alpha %>%
                                filter(infection == "uninfected") %>%
                                tibble::column_to_rownames('Row.names'),
                              by = "row.names") %>%
  mutate(round = factor(round),
         week = factor(week)) %>%
  merge(., donor_info, by = "round")

ggplot(nhbe_pneu_graph_data, aes(x = MDS1, y = MDS2, color = donor)) +
  geom_point(size = 3) +
  scale_shape_manual(values = c(3, 19, 17, 15)) +
  ggtitle("Preinfection phenotyping cell type diversity MDS-\nNHBE pneumacult, week 3 only") +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  labs(color = "Donor")

Figure 2

Figure 2A

Shows UMAP of flow cytometry data of infected cells. Flow cytometry channel value data from round 7 Pneumacult treated cells was used to generate UMAPs displaying the manually gated populations and marker staining. The UMAP is a combination of uninfected cells (n=3 wells) and infected cells (n=3 wells). Figure 2A is generated using UMAP colored by HA or NP staining, but only the infected UMAP with staining is displayed.

ggplot(data = cell.sub,
       aes(x = UMAP_X, y = UMAP_Y, color = `Cal NP`, order = orderrank_np)) +
  geom_point() +
  #scale_color_gradientn(colors = c("white","#C6DBEF", "#4292C6", "#08306B")) +
  scale_color_distiller(palette = "RdYlBu", direction = -1) +
  facet_grid(.~condition) +
  ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by Cal NP staining") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  coord_fixed()

ggplot(data = cell.sub,
       aes(x = UMAP_X, y = UMAP_Y, color = `Cal HA`, order = (orderrank_ha))) +
  geom_point() +
  scale_color_distiller(palette = "RdYlBu", direction = -1) +
  facet_grid(.~condition) +
  ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by Cal HA staining") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) +
  coord_fixed()

Figure 2B

Shows alluvial plots of pre and post infection phenotyping data, as well as the overall percentage of alive cells that are IAV+.

This dataset is generated from the average cell type frequencies and coerced into alluvial format. The code below will generate an alluvial plot for each of the rounds/conditions that were infected with Cal/07/09. This includes:

Cells differentiated for 3 weeks: * Pneumacult (PC) treated NHBE cells, infected at MOI=0.5 (rounds 3, 4, 7, 9) * PC + IL-13 low treated NHBE cells, infected at MOI=0.5 (rounds 3, 4, 7, 9) * PC + IL-13 high treated NHBE cells, infected at MOI=0.5 (rounds 3, 4, 7, 9) * PC + DAPT treated NHBE cells, infected at MOI=0.5 (rounds 7 and 9) * Lonza treated NHBE cells, infected at MOI=0.5 (rounds 7 and 9) * PC treated NHNE cells, infected at MOI=0.5 (round 7 only) * PC treated NHBE cells, infected at MOI=1 (round 3 only)

Cells differentiated for 0 weeks: * PC treated NHBE cells, infected at MOI=0.5 (round 7 only)

alluvial_dat <- read_excel("../data/nhbe_alluvial_plot_data.xlsx", range = cell_cols("A:G"), na = "NA") %>%
  mutate(cell_type = factor(cell_type, levels = c("Ciliated", "Secretory", "Basal", "Triple Negative", "CD66c+ Other", "CD66c- Other", "Goblet", "CD271+ Ciliated", "CD66c+ Basal")),
         stratum = factor(stratum, levels = c("Ciliated", "Secretory", "Basal", "Triple Negative", "CD66c+ Other", "CD66c- Other", "Goblet", "CD271+ Ciliated", "CD66c+ Basal", "negative", "positive")),
         measurement = as.factor(measurement),
         measurement = fct_recode(measurement, "Preinfection" = "preinfection_frequency", "Postinfection" = "postinfection_frequency"),
         measurement = factor(measurement, levels = c("Preinfection", "Postinfection", "IAV+")),
         experiment = as.factor(experiment))

#Function to make alluvial plots for each of the sets of data
make_alluvial_plot <- function(exp, MOI) {
print(ggplot(data = alluvial_dat %>%
                 filter(experiment==exp & moi==MOI),
     aes(x = measurement,
         y=freq,
         stratum = stratum, 
         alluvium = alluvium, 
         label = stratum)) +
geom_alluvium(aes(fill = stratum),color = "gray", na.rm=TRUE) +
geom_stratum(aes(fill=stratum), na.rm=TRUE) +
scale_fill_manual(values = c("#1080FF", "#8000FF", "#FB02FF", "#FD8008", "#FECC66", "#FC6FCF", "#FFFF0A", "#FFFFFF", "#108080", "lightgray", "darkgray")) +
ggtitle(paste0("nHTBE ",exp," alluvial\nMOI=",MOI)) +
theme_bw() +
theme(panel.grid = element_blank()))
  }

combos <- alluvial_dat %>%
  distinct(experiment, moi)

for (x in (1:nrow(combos))) {
  make_alluvial_plot(combos$experiment[x], combos$moi[x])
}
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

Figure 2C

Shows bar plot of cell types present within IAV+ cells, comparing MOI of 0.5 to 1 of Pneumacult treated cells.

The phenotyping data for MOI=0.5 is a combination of w wells (replicates), while the MOI=1 had a single well.

phenotyping_dat %>%
  filter(round == "3" & infection == "infected" & condition == "pneumacult") %>%
  mutate(moi = factor(moi, levels = c("0.5", "1"))) %>%
  droplevels() %>%
  ggplot(aes(x = moi, y = avg_freq_of_alive, fill = cell_type)) +
    geom_bar(stat = "identity", color = "black") +
    geom_errorbar(aes(ymin = SDpos-sd, ymax = SDpos+sd), width = 0.2, position = "identity") + 
    scale_fill_manual(values = colors) +
    scale_y_continuous(limits = c(0,100), expand = c(0,0)) +
    ggtitle("NHBE post-infection IAV+ cells\nRound 3") +
    labs(x = "MOI", y = "% of IAV+ cells") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          panel.grid = element_blank())

Figure 2D

Graph files available on request (generated in Prism). Code for running the ANOVA with Tukey HSD posthoc test is listed below using log gMFI values from a differentiation of NHBE cells (N=3 wells) and 2 wells of A549 cells.

sialic_dat <- read.csv("../data/sialic_acid_log_gmfi_values.csv", header = TRUE, stringsAsFactors = TRUE)

#ANOVA for alpha 2,3 sialic acid data
a2_3_anova <- aov(gMFI ~ cell_type, data = subset(sialic_dat, sialic_acid_type=="a2_3"))
summary(a2_3_anova)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## cell_type    9 1.0667  0.1185   168.4 3.46e-16 ***
## Residuals   19 0.0134  0.0007                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Tukey posthoc test
a2_3_tukey_result <- HSD.test(a2_3_anova, "cell_type", group = TRUE)
print(a2_3_tukey_result)
## $statistics
##        MSerror Df     Mean        CV
##   0.0007039806 19 3.997466 0.6637363
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type  10         5.037459  0.05
## 
## $means
##                     gMFI         std r      Min      Max      Q25      Q50
## A549            4.383900 0.026939196 2 4.364851 4.402949 4.374375 4.383900
## basal           3.937709 0.027658509 3 3.918869 3.969463 3.921832 3.924796
## CD271+ ciliated 4.326459 0.015521521 3 4.309524 4.340008 4.319684 4.329845
## CD66c- other    3.793874 0.003596293 3 3.789722 3.796019 3.792801 3.795880
## CD66c+ basal    4.076163 0.013379922 3 4.061867 4.088384 4.070053 4.078239
## CD66c+ other    3.864606 0.017060550 3 3.845098 3.876737 3.858540 3.871981
## ciliated        4.142304 0.011764348 3 4.130302 4.153815 4.136549 4.142796
## goblet          3.879580 0.062932970 3 3.808211 3.927114 3.855813 3.903416
## secretory       3.839061 0.013709037 3 3.826334 3.853577 3.831803 3.837273
## triple negative 3.859816 0.023433533 3 3.837146 3.883945 3.847752 3.858357
##                      Q75
## A549            4.393424
## basal           3.947129
## CD271+ ciliated 4.334926
## CD66c- other    3.795949
## CD66c+ basal    4.083312
## CD66c+ other    3.874359
## ciliated        4.148305
## goblet          3.915265
## secretory       3.845425
## triple negative 3.871151
## 
## $comparison
## NULL
## 
## $groups
##                     gMFI groups
## A549            4.383900      a
## CD271+ ciliated 4.326459      a
## ciliated        4.142304      b
## CD66c+ basal    4.076163      b
## basal           3.937709      c
## goblet          3.879580     cd
## CD66c+ other    3.864606    cde
## triple negative 3.859816     de
## secretory       3.839061     de
## CD66c- other    3.793874      e
## 
## attr(,"class")
## [1] "group"
#ANOVA for alpha 2,6 sialic acid data
a2_6_anova <- aov(gMFI ~ cell_type, data = subset(sialic_dat, sialic_acid_type=="a2_6"))
summary(a2_6_anova)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## cell_type    9 1.7412 0.19347   50.77 2.14e-11 ***
## Residuals   19 0.0724 0.00381                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Tukey posthoc test
a2_6_tukey_result <- HSD.test(a2_6_anova, "cell_type", group = TRUE)
print(a2_6_tukey_result)
## $statistics
##       MSerror Df     Mean       CV
##   0.003810421 19 4.572376 1.350033
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type  10         5.037459  0.05
## 
## $means
##                     gMFI         std r      Min      Max      Q25      Q50
## A549            4.425764 0.003306624 2 4.423426 4.428102 4.424595 4.425764
## basal           4.486685 0.058440877 3 4.428944 4.545802 4.457127 4.485310
## CD271+ ciliated 5.050441 0.038302416 3 5.006286 5.074721 5.038301 5.070315
## CD66c- other    4.305451 0.045401364 3 4.262261 4.352780 4.281787 4.301312
## CD66c+ basal    4.795549 0.063828195 3 4.728289 4.855277 4.765685 4.803081
## CD66c+ other    4.510580 0.059548293 3 4.449725 4.568730 4.481505 4.513284
## ciliated        4.792961 0.027776811 3 4.761048 4.811696 4.783594 4.806139
## goblet          4.691511 0.114444762 3 4.596960 4.818740 4.627896 4.658831
## secretory       4.344361 0.056128094 3 4.295611 4.405722 4.313681 4.331751
## triple negative 4.271588 0.067899070 3 4.221414 4.348850 4.232957 4.244500
##                      Q75
## A549            4.426933
## basal           4.515556
## CD271+ ciliated 5.072518
## CD66c- other    4.327046
## CD66c+ basal    4.829179
## CD66c+ other    4.541007
## ciliated        4.808917
## goblet          4.738786
## secretory       4.368737
## triple negative 4.296675
## 
## $comparison
## NULL
## 
## $groups
##                     gMFI groups
## CD271+ ciliated 5.050441      a
## CD66c+ basal    4.795549      b
## ciliated        4.792961      b
## goblet          4.691511      b
## CD66c+ other    4.510580      c
## basal           4.486685      c
## A549            4.425764     cd
## secretory       4.344361     cd
## CD66c- other    4.305451      d
## triple negative 4.271588      d
## 
## attr(,"class")
## [1] "group"

Figure 2E

Data and graph files available on request (generated in Prism).

Figure 2F

Shows week 0 alluvial plot. A plot for this experiment is generated in the figure 2B code chunk, but is reproduced here as well.

ggplot(data = subset(alluvial_dat, experiment == "7_pneumacult_week0"),
     aes(x = measurement,
         y=freq,
         stratum = stratum, 
         alluvium = alluvium, 
         label = stratum)) +
  geom_alluvium(aes(fill = stratum),color = "gray", na.rm=TRUE) +
  geom_stratum(aes(fill=stratum), na.rm=TRUE) +
  scale_fill_manual(values = c("#1080FF", "#8000FF", "#FB02FF", "#FD8008", "#FECC66", "#FC6FCF", "#FFFF0A", "#FFFFFF", "#108080", "lightgray", "darkgray")) +
  ggtitle("Pneumacult week 0 post-ALI alluvial plot") +
  theme_bw() +
  theme(panel.grid = element_blank())
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.

Figure 3

Figure 3A

Data and graph files available upon request (generated in Prism)

Figure 3B

Data and graph files available upon request (generated in Prism)

Figure 3C

Violin plots of HA and NP fluorescence intensity (FI). Channel values of fluorescence intensity values for HA and NP stains were exported from FlowJo for each gated population. The code below will produce violin plots in Figure 3C.

Each violin combines events from three wells.

meta_3c <- read.csv("../data/figure_3c_metadata.csv", header = TRUE)

#Get file info, we need to not read in empty files
list.of.files <- file.info(meta_3c$path)
# get the size for each file
sizes <- file.info(meta_3c$path)$size
#List non-empty files
list.of.non.empty.files <- rownames(list.of.files)[which(sizes > 6)]

fi_dat <- readr::read_csv(list.of.non.empty.files, id = "path", col_names = FALSE) %>%
  rename(fi = X1)
## Rows: 49391 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): X1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fi_dat <- as.data.frame(fi_dat)

fi_dat <- left_join(fi_dat, meta_3c, by = "path") %>%
  mutate(round = factor(round),
         cell_origin = factor(cell_origin),
         treatment = factor(treatment, levels = c("Pneumacult", "PC_IL13_low", "PC_IL13_high", "Lonza", "PC_DAPT")),
         origin_treatment = paste0(cell_origin, "_", treatment),
         origin_treatment = factor(origin_treatment, levels = c("nhbe_Pneumacult", "nhbe_PC_IL13_low", "nhbe_PC_IL13_high", "nhbe_PC_DAPT", "nhbe_Lonza", "nhne_Pneumacult")),
         week == factor(week),
         cell_type = factor(cell_type, levels = c("Ciliated", "Secretory", "Basal", "Triple Negative", "CD66c+ Other", "CD66c- Other", "Goblet", "CD271+ Ciliated", "CD66c+ Basal")),
         HA_or_NP = factor(HA_or_NP),
         logFI = log10(fi))

Graph in paper of round 7 data:

ggplot(fi_dat, aes(x = cell_type, y = logFI, color = cell_type)) +
  geom_violin(scale = "count", draw_quantiles = c(0.25, 0.5, 0.75)) +
  facet_grid(origin_treatment~HA_or_NP) +
  scale_color_manual(values = colors) +
  scale_fill_manual(values = colors) + 
  scale_y_continuous(limits = c(0,6)) +
  ggtitle(paste0("Round 7 Week 3 log10 FI plot\nMOI 0.5")) +
  labs(x = "Cell type",
       y = "Log10 FI",
       color = "Cell type") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        strip.text = element_text(size = 10))
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.

Mean FI values were compared within a treatment condition using an ANOVA and Tukey posthoc statistical test to see whether any cell types have higher virus burden within the infected cells. Differences in FI within a cell type between treatment conditions were not statistically compared.

#Function to perform anova. Requires FI dataframe, the round
run.anova <- function(df, round, week, origin_treatment, HA_or_NP) {
  results <- aov(logFI ~ cell_type, data = subset(df, round == round & week == week & origin_treatment == origin_treatment & HA_or_NP == HA_or_NP))
  print(paste0("ANOVA of round ", round,", week ", week, ", ", origin_treatment, " and ", HA_or_NP, " logFI between cell types"))
  print(summary(results))
  
  print(HSD.test(results, "cell_type", group = TRUE))
}

combos <- fi_dat %>%
  distinct(round, week, HA_or_NP, origin_treatment)

for (x in (1:nrow(combos))) {
  run.anova(fi_dat, combos$round[x], combos$week[x], combos$origin_treatment[x], combos$HA_or_NP[x])
}
## [1] "ANOVA of round 7, week 3, nhbe_Lonza and HA logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_DAPT and HA logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_Pneumacult and HA logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_IL13_low and HA logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_IL13_high and HA logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_Lonza and NP logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_IL13_low and NP logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_IL13_high and NP logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_DAPT and NP logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_Pneumacult and NP logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhne_Pneumacult and HA logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhne_Pneumacult and NP logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"
run.anova(fi_dat, 7, 3, "nhbe_Pneumacult", "HA")
## [1] "ANOVA of round 7, week 3, nhbe_Pneumacult and HA logFI between cell types"
##                Df Sum Sq Mean Sq F value Pr(>F)    
## cell_type       8    631   78.89   366.5 <2e-16 ***
## Residuals   49382  10631    0.22                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
##     MSerror    Df     Mean       CV
##   0.2152815 49382 3.651051 12.70824
## 
## $parameters
##    test    name.t ntr StudentizedRange alpha
##   Tukey cell_type   9         4.386509  0.05
## 
## $means
##                    logFI       std     r      Min      Max      Q25      Q50
## Basal           3.338597 0.5385615   499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757  5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other    3.721810 0.5003153   420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal    3.737218 0.5572598   432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other    3.612335 0.5623695  3233 2.445936 5.048178 3.116385 3.674303
## Ciliated        3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet          3.502913 0.4375353   236 2.563614 4.862660 3.208079 3.576817
## Secretory       3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957  1616 2.478223 4.744064 2.928697 3.220904
##                      Q75
## Basal           3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other    4.096538
## CD66c+ Basal    4.174894
## CD66c+ Other    4.064042
## Ciliated        3.978415
## Goblet          3.752905
## Secretory       3.989544
## Triple Negative 3.522732
## 
## $comparison
## NULL
## 
## $groups
##                    logFI groups
## CD271+ Ciliated 3.832046      a
## CD66c+ Basal    3.737218      b
## CD66c- Other    3.721810      b
## Ciliated        3.687258      b
## CD66c+ Other    3.612335      c
## Secretory       3.563014      d
## Goblet          3.502913      d
## Basal           3.338597      e
## Triple Negative 3.249685      f
## 
## attr(,"class")
## [1] "group"

Figure 3D

Shows replication dynamics of Cal/07/09 in NHBE cells. NHBE cells were differentiated for 3 weeks in Pneumacult media. This came from the round 3 differentiation.

The sequencing files were analyzed with the InVERT pipeline which produces csv files of mRNA, cRNA and vRNA counts for each influenza segment. Comparisons of m/c/vRNA ratios between ciliated and secretory cells, as well as total influenza TPM per cell type were graphed for the paper.

#Specify "not in" function
`%ni%` <- Negate(`%in%`)

meta_3d <- utils::read.csv("../data/figure_3d_metadata.csv", header = TRUE) %>%
  mutate(moi = as.factor(moi)) %>%
  mutate(replicate = as.factor(replicate)) %>%
  mutate(timepoint = as.factor(timepoint))
files_3d <- list.files(path = "../data/invert_data", pattern = ".csv")

files_3d <- paste0("../data/invert_data/", files_3d)

figure_3_dat <- readr::read_csv(files_3d, id = "path", na = "")
## New names:
## Rows: 288 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (8): FPKM_positive_sense, FPKM_negative_sense,
## mrna:(mrna,crna)_ratio, T...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
names(figure_3_dat)[2] <- "gene"
names(figure_3_dat)[5] <- "mrna_to_total_pos_rna"

#Change the NA's that are calculated for positive or negative TPM values (when there are no FPKM values) to zero, except for the spliced genes
figure_3_dat[figure_3_dat$gene %ni% c("NS1", "NEP", "M1", "M2") & is.na(figure_3_dat$TPM_positive_sense), "TPM_positive_sense"] <- 0
figure_3_dat[figure_3_dat$gene %ni% c("NS1", "NEP", "M1", "M2") & is.na(figure_3_dat$TPM_negative_sense), "TPM_negative_sense"] <- 0

#Change NAs that are entered for cRNAs and vRNAs to zero, except for the spliced genes
figure_3_dat[figure_3_dat$gene %ni% c("NS1", "NEP", "M1", "M2") & is.na(figure_3_dat$crna_TPM), "crna_TPM"] <- 0
figure_3_dat[figure_3_dat$gene %ni% c("NS1", "NEP", "M1", "M2") & is.na(figure_3_dat$vrna_TPM), "vrna_TPM"] <- 0

#Change NAs that are entered for mRNAs to zero, except for M and NS which don't exist as mRNAs
figure_3_dat[figure_3_dat$gene %ni% c("M", "NS") & is.na(figure_3_dat$mrna_TPM), "mrna_TPM"] <- 0


figure_3_dat <- merge(meta_3d, figure_3_dat, by = "path") %>%
  dplyr::select(.,-c(path,FPKM_positive_sense, FPKM_negative_sense))

figure_3_dat_long <- figure_3_dat %>%
  tidyr::gather(key = "RNA_type", value = "TPM", -c(Sample, id, condition, moi, timepoint, cell_type, subset, replicate, gene, mrna_to_total_pos_rna)) %>%
  mutate(RNA_type = factor(RNA_type, levels = c("TPM_positive_sense", "TPM_negative_sense", "crna_TPM", "mrna_TPM", "vrna_TPM"))) %>%
  mutate(gene = factor(gene, levels = c("PB2", "PB1", "PA", "HA", "NP", "NA", "M", "M1", "M2", "NS", "NS1", "NEP"))) %>%
  group_by(cell_type,subset,gene,moi,timepoint,RNA_type) %>%
  dplyr::summarise(averageTPM = mean(TPM),
            num = n())
## `summarise()` has grouped output by 'cell_type', 'subset', 'gene', 'moi',
## 'timepoint'. You can override using the `.groups` argument.
#Make dataset which plots secretory and ciliated m/c/vRNA levels on the x and y axis
proportions.dat <- figure_3_dat_long %>%
  filter(RNA_type != "TPM_positive_sense", RNA_type != "TPM_negative_sense") %>%
  mutate(subset = factor(subset)) %>%
  mutate(cell_type = factor(cell_type)) %>%
  mutate(timepoint = as.numeric(as.character(timepoint))) %>%
  filter(!is.na(averageTPM)) %>%
  droplevels() %>%
  group_by(gene, subset, timepoint) %>%
  mutate(total_TPM = sum(averageTPM),
         proportion = averageTPM/total_TPM*100)



#Make new dataframe which will compare the ciliated values and secretory values on the x and y axis, respectively
proportion_regression_dat <- proportions.dat %>%
  filter(subset == "ciliated") %>%
  left_join(proportions.dat %>%
              filter(subset == "secretory"), 
            by = c("gene", "moi", "RNA_type", "timepoint"),
            suffix = c(".ciliated", ".secretory")) %>%
  mutate(timepoint = factor(timepoint))

Calculating correlation coefficient for combined dataset

cor.test(x = proportion_regression_dat$proportion.ciliated, 
         y = proportion_regression_dat$proportion.secretory, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  proportion_regression_dat$proportion.ciliated and proportion_regression_dat$proportion.secretory
## t = 68.107, df = 72, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9878114 0.9951752
## sample estimates:
##       cor 
## 0.9923281

Actual p value from this test:

cor.test(x = proportion_regression_dat$proportion.ciliated, 
         y = proportion_regression_dat$proportion.secretory, method = "pearson")$p.value
## [1] 4.059059e-67
mcv_corr_plot <- ggplot(subset(proportion_regression_dat,timepoint != '0'),
               aes(x = proportion.ciliated,
                   y = proportion.secretory)) +
  geom_point(aes(shape = RNA_type,
                 color = gene),
                 #alpha = timepoint),
                 
             size = 3) +
  geom_smooth(method = "lm",
              color = "black",
              linewidth = 0.5,
              formula = y ~ x)+
  stat_cor(aes(label = ..rr.label..)) + 
  ggtitle("Correlation of m/c/vRNA proportions in ciliated and secretory cells\n0.5 MOI, JF291/FN02") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid.major = element_blank())
  

mcv_corr_plot
## Warning: The dot-dot notation (`..rr.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(rr.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Comparing total IAV TPM in secretory vs ciliated cells with a T test at both 6 and 12 hours post infection

#Summarize the total IAV TPM
tpm_dat <- figure_3_dat %>%
  filter(moi == 0.5) %>%
  group_by(subset, replicate, moi, timepoint) %>%
  dplyr::summarise(total_pos_TPM = sum(TPM_positive_sense, na.rm = TRUE),
                   total_neg_TPM = sum(TPM_negative_sense, na.rm = TRUE)) %>%
  dplyr::mutate(total_TPM = total_pos_TPM+total_neg_TPM,
         subset = factor(subset, levels = c("ciliated", "secretory")))
## `summarise()` has grouped output by 'subset', 'replicate', 'moi'. You can
## override using the `.groups` argument.
#Calculate T tests for each group
ttest.results <- tpm_dat %>% 
   group_by(timepoint) %>% 
   do( tidy(t.test(data=., total_TPM~subset)))
ttest.results
## # A tibble: 2 × 11
## # Groups:   timepoint [2]
##   timepoint estimate estimate1 estimate2 statistic  p.value parameter conf.low
##   <fct>        <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>    <dbl>
## 1 6           -4349.     5409.     9759.     -6.89 0.000558      5.74   -5911.
## 2 12          13029.    52425.    39395.      2.04 0.130         3.12   -6847.
## # ℹ 3 more variables: conf.high <dbl>, method <chr>, alternative <chr>

Only differences at 6 hours are statistically significant. Make a bar plot of this data:

#Summarize mean and standard deviation
tpm_graph_dat <- summarySE(data=tpm_dat, measurevar = "total_TPM", groupvars = c("subset", "timepoint", "moi"))

annotation_df <- data.frame(
  timepoint = c(6, 12),
  start = c("ciliated", "ciliated"),
  end = c("secretory", "secretory"),
  y = c(60000, 60000),
  label = c("p=0.0006", "ns")
)

#Create bar plot which shows mean +/- standard deviation
ggplot(tpm_graph_dat, aes(x = subset, y = total_TPM)) +
    geom_bar(aes(fill = subset), stat = "identity") +
    geom_errorbar(aes(ymin=total_TPM-sd, ymax=total_TPM+sd),
                  size=.3,    # Thinner lines
                  width=.2,
                  position=position_dodge(.9)) +
    geom_signif(data = annotation_df,
                aes(xmin = start, xmax = end, annotations = label, y_position = y),
                manual = TRUE) + 
    facet_grid(moi~timepoint) +
    scale_fill_manual(values = c("#1080FF", "#8000FF")) + 
    ggtitle("Total IAV transcript counts (TPM)\nciliated vs secretory cells\n0.5 MOI, JF291/FN02") +
    labs(x = "Cell type",
         y = "Average TPM",
         caption = "Error bars show +/- standard deviation\nSignificance calculated with Welch's two sample t-test") +
    theme(panel.grid = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_signif(data = annotation_df, aes(xmin = start, xmax = end, :
## Ignoring unknown aesthetics: xmin, xmax, annotations, and y_position
## Warning: Combining variables of class <factor> and <numeric> was deprecated in ggplot2
## 3.4.0.
## ℹ Please ensure your variables are compatible before plotting (location:
##   `combine_vars()`)
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Combining variables of class <numeric> and <factor> was deprecated in ggplot2
## 3.4.0.
## ℹ Please ensure your variables are compatible before plotting (location:
##   `combine_vars()`)
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Figure 4

Figure 4A

Data and graph files available upon request (generated in Prism).

Figure 4B

Shows PCA generated on infected phenotyping data generated via flow cytometry data, i.e. proportions of infected cell types present in the culture. Proportions were calculated by identifying cell type frequencies within the IAV+ cells in the culture, and scaling each proportion to the total %IAV+ cells (in other words, the proportions of each cell type will add up to the % infection in the culture.)

Points on the PCA are colored according to a supplementary variable of viral burst size, measured via area under the curve of a 36h multicycle growth curve (MCGC). Cultures differentiated for 1 or 3 weeks were infected with Cal/07/09 (MOI=0.1). Virus was harvested and plaqued on MDCK cells at 0, 12, 24, and 36 hours post infection. The PFU/total cell was calculated from each timepoint, and area under the curve was calculated as an average of n=5 technical replicates. The following culture/treatments are shown in the figure 4 data:

Cells differentiated for 3 weeks: * Pneumacult (PC) treated NHBE cells, infected at MOI=0.1 (rounds 3, 4, 7, 9) * PC + IL-13 low treated NHBE cells, infected at MOI=0.1 (rounds 3, 4, 7, 9) * PC + IL-13 high treated NHBE cells, infected at MOI=0.1 (rounds 3, 4, 7, 9) * PC + DAPT treated NHBE cells, infected at MOI=0.1 (rounds 7 and 9) * Lonza treated NHBE cells, infected at MOI=0.1 (rounds 7 and 9) * PC treated NHNE cells, infected at MOI=0.1 (round 7 only)

Cells differentiated for 1 week: * PC treated NHBE cells, infected at MOI=0.1 (round 7 only) * Lonza treated NHBE cells, infected at MOI=0.1 (round 7 only)

figure_4_dat <- read.csv("../data/nhbe_burst_size_data.csv", header = TRUE, row.names = 1)

scaled_inf_pca <- prcomp(figure_4_dat[,c(14:22)], 
                   scale = TRUE)

scaled_inf_pca_plot <- fviz_pca_biplot(scaled_inf_pca, repel = TRUE,
                col.ind = figure_4_dat$pfu_per_cell_36h_auc, 
                gradient.cols = c("blue", "red"), # Variables color
                title = "PCA biplot of *scaled* IAV+ phenotyping\nNo round 5 or week 0\n36 hr AUC",
                legend.title = list(color = "AUC 36h"))

scaled_inf_pca_plot

Figure 4C

Shows similar PCA to 4B, but PCA is performed on the uninfected phenotyping data. This phenotyping data largely overlaps with the data described in figure 2B, with the addition of a 1 week differentiated culture that was grown in Lonza media.

uninfected_pca <- prcomp(figure_4_dat[,c(5:13)], 
                   scale = TRUE)

uninfected_pca_plot <- fviz_pca_biplot(uninfected_pca, repel = TRUE,
                col.ind = figure_4_dat$pfu_per_cell_36h_auc, 
                gradient.cols = c("blue", "red"),
                title = "PCA biplot of pre-infection phenotyping\n36 hr AUC",
                legend.title = list(color = "AUC 36h"))

uninfected_pca_plot